import urllib
import urllib.request
from lxml import etree


# 访问指定url，保存至指定名称


def openurl(url, timeout=3):
    maxTryNum = 5
    print("openurl2 " + url)
    head = {
        "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Mobile Safari/537.36"
    }

    req = urllib.request.Request(url=url, headers=head)
    html = ""
    for tries in range(maxTryNum):
        try:
            response = urllib.request.urlopen(req, timeout=timeout)
            html = response.read().decode('utf-8')
            return html
        except:
            print(f"{url} error!")
            if tries < (maxTryNum - 1):
                continue
            else:
                print("Has tried %d times to access bid_url %s, all failed!" % (maxTryNum, url))
                break


def parse(page, id):
    danwei = page.xpath(f'//table[2]/tbody/tr/td[contains(text(),{id})]/ancestor::tr/td[1]/text()')
    bumen = page.xpath(f'//table[2]/tbody/tr/td[contains(text(),{id})]/ancestor::tr/td[2]/text()')
    zhiwei = page.xpath(f'//table[2]/tbody/tr/td[contains(text(),{id})]/ancestor::tr/td[3]/text()')
    daima = page.xpath(f'//table[2]/tbody/tr/td[contains(text(),{id})]/ancestor::tr/td[4]/text()')
    zhaokao = page.xpath(f'//table[2]/tbody/tr/td[contains(text(),{id})]/ancestor::tr/td[5]/text()')
    jiaofei = page.xpath(f'//table[2]/tbody/tr/td[contains(text(),{id})]/ancestor::tr/td[6]/text()')
    return danwei, bumen, zhiwei, daima, zhaokao, jiaofei


if __name__ == '__main__':
    idlist = ['1110201', '1191101',
              '2050202',
              '2050601',
              '2050806',
              '2061603',
              '2090401',  # 国家“双一流”建设高校毕业生
              '1090101',
              '2010201',
              '2091901'
              ]


    # 26日
    page = openurl('http://file.hrss.tj.gov.cn/GwyExamFile/resource/portalWeb/1645753337825.html')
    page = etree.HTML(page)
    print('单位, 部门, 职位, 代码, 招考人数, 缴费人数')
    for id in idlist:
        print(parse(page, id))


    # 26日
    page = openurl('http://file.hrss.tj.gov.cn/GwyExamFile/resource/portalWeb/1645841810197.html')
    page = etree.HTML(page)
    print('单位, 部门, 职位, 代码, 招考人数, 缴费人数')
    for id in idlist:
        print(parse(page, id))

    # 27日
    page = openurl('http://file.hrss.tj.gov.cn/GwyExamFile/resource/portalWeb/1645925560993.html')
    page = etree.HTML(page)
    print('单位, 部门, 职位, 代码, 招考人数, 缴费人数')
    for id in idlist:
        print(parse(page, id))

    # 28日
    page = openurl('http://file.hrss.tj.gov.cn/GwyExamFile/resource/portalWeb/1646010221435.html')
    page = etree.HTML(page)
    print('单位, 部门, 职位, 代码, 招考人数, 缴费人数')
    for id in idlist:
        print(parse(page, id))

